R Markdown

This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.

install.packages("plotly")
## Installing package into '/usr/local/lib/R/site-library'
## (as 'lib' is unspecified)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(tidyr)
library(ggplot2)
library(plotly)
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
video_stats <- read.delim("videos-stats.csv", sep=",", strip.white=T)
dim(video_stats)
## [1] 1881    8
str(video_stats)
## 'data.frame':    1881 obs. of  8 variables:
##  $ X           : int  0 1 2 3 4 5 6 7 8 9 ...
##  $ Title       : chr  "Apple Pay Is Killing the Physical Wallet After Only Eight Years | Tech News Briefing Podcast | WSJ" "The most EXPENSIVE thing I own." "My New House Gaming Setup is SICK!" "Petrol Vs Liquid Nitrogen | Freezing Experiment | \340\264\252\340\265\206\340\264\237\340\265\215\340\264\260\340\265\213\340\ ...
##  $ Video.ID    : chr  "wAZZ-UWGVHI" "b3x28s61q3c" "4mgePWWCAmA" "kXiYSI7H2b0" ...
##  $ Published.At: chr  "2022-08-23" "2022-08-24" "2022-08-23" "2022-08-23" ...
##  $ Keyword     : chr  "tech" "tech" "tech" "tech" ...
##  $ Likes       : num  3407 76779 63825 71566 96513 ...
##  $ Comments    : num  672 4306 3338 1426 5155 ...
##  $ Views       : num  135612 1758063 1564007 922918 1855644 ...
video_stats %>%
  summarise_all(~ sum(is.na(.)))
##   X Title Video.ID Published.At Keyword Likes Comments Views
## 1 0     0        0            0       0     2        2     2
video_stats <- video_stats %>%
  drop_na()



video_stats <- video_stats %>%
  mutate(LikesPer1k = round(Likes/(Views/1000), 2),
         CommentsPer1k = round(Comments/(Views/1000), 2),
         TitleLen = nchar(Title))

video_stats <- video_stats %>%
  mutate(PubYear = as.factor(substr(Published.At,1,4)),
         Published.At = as.POSIXct(Published.At, format="%Y-%m-%d"))

video_stats %>%
  ggplot(aes(x=PubYear)) +
  geom_bar(fill="#765add") +
  theme_minimal() +
  labs(title = "Number of videos by year", x = "Publication Year", y = "Count")

video_stats%>%
  ggplot(aes(x=TitleLen)) +
  geom_histogram(fill="#765add",bins=30) +
  theme_minimal() +
  labs(title = "Distribution of title length", x = "Title Length (char)", y = "frequency")

plot1 <- video_stats %>%
  # get the total comments per keyword per year; divide by 1000 to change scale
  group_by(PubYear, Keyword) %>%
  summarize(total_comments = sum(Comments)/1000) %>%
  # create a ggplot colored by keywords
  ggplot(aes(x=PubYear, y=total_comments, color=Keyword))+
  # group = 1 specifies we want 1 line per keyword
  geom_line(aes(group=1)) +
  geom_point(size=0.5,alpha=0.5) +
  ylab("Comment Count") +
  xlab("Published Year") +
  labs(title="Total Comments by Category Overtime (by 1k)")+
  theme_minimal()
## `summarise()` has grouped output by 'PubYear'. You can override using the
## `.groups` argument.
#convert it into a plotly graph
ggplotly(plot1)
video_stats %>%
  # Specify variables we want to include
  plot_ly(x=~LikesPer1k, y=~CommentsPer1k, color=~Keyword, type="scatter", mode="markers",
          size=~Views, sizes=c(5,70),
          # Add markers for each point and specify information to display on hover
          marker=list(sizemode="diameter", opacity=0.5), hoverinfo="text",
                      # Customize hover text
                      text=~paste(
                        paste0("Likes per 1k views: ", LikesPer1k),
                        paste0("Comments per 1k views: ", CommentsPer1k),
                        paste0("Views (100k): ", round(Views/100000, 2)),
                        paste0("Keyword (Category): ", Keyword),
                      sep="<br>")) %>%
  # Label the axes
  layout(title = 'Likes VS Comments per 1k Views',
         xaxis = list(title = 'Likes per 1k'),
         yaxis = list(title = 'Comments per 1k'),
         legend = list(title=list(text='<b> Keyword </b>')))
## Warning: `line.width` does not currently support multiple values.

## Warning: `line.width` does not currently support multiple values.

## Warning: `line.width` does not currently support multiple values.

## Warning: `line.width` does not currently support multiple values.

## Warning: `line.width` does not currently support multiple values.

## Warning: `line.width` does not currently support multiple values.

## Warning: `line.width` does not currently support multiple values.

## Warning: `line.width` does not currently support multiple values.

## Warning: `line.width` does not currently support multiple values.

## Warning: `line.width` does not currently support multiple values.

## Warning: `line.width` does not currently support multiple values.

## Warning: `line.width` does not currently support multiple values.

## Warning: `line.width` does not currently support multiple values.

## Warning: `line.width` does not currently support multiple values.

## Warning: `line.width` does not currently support multiple values.

## Warning: `line.width` does not currently support multiple values.

## Warning: `line.width` does not currently support multiple values.

## Warning: `line.width` does not currently support multiple values.

## Warning: `line.width` does not currently support multiple values.

## Warning: `line.width` does not currently support multiple values.

## Warning: `line.width` does not currently support multiple values.

## Warning: `line.width` does not currently support multiple values.

## Warning: `line.width` does not currently support multiple values.

## Warning: `line.width` does not currently support multiple values.

## Warning: `line.width` does not currently support multiple values.

## Warning: `line.width` does not currently support multiple values.

## Warning: `line.width` does not currently support multiple values.

## Warning: `line.width` does not currently support multiple values.

## Warning: `line.width` does not currently support multiple values.

## Warning: `line.width` does not currently support multiple values.

## Warning: `line.width` does not currently support multiple values.

## Warning: `line.width` does not currently support multiple values.

## Warning: `line.width` does not currently support multiple values.

## Warning: `line.width` does not currently support multiple values.

## Warning: `line.width` does not currently support multiple values.

## Warning: `line.width` does not currently support multiple values.

## Warning: `line.width` does not currently support multiple values.

## Warning: `line.width` does not currently support multiple values.

## Warning: `line.width` does not currently support multiple values.

## Warning: `line.width` does not currently support multiple values.

## Warning: `line.width` does not currently support multiple values.
## Warning in RColorBrewer::brewer.pal(N, "Set2"): n too large, allowed maximum for palette Set2 is 8
## Returning the palette you asked for with that many colors

## Warning in RColorBrewer::brewer.pal(N, "Set2"): n too large, allowed maximum for palette Set2 is 8
## Returning the palette you asked for with that many colors
# Install the library
install.packages("flexdashboard")
## Installing package into '/usr/local/lib/R/site-library'
## (as 'lib' is unspecified)